library(nycflights13)
library(tidyverse)
library(knitr)
library(ggthemes)
t1 <- tibble(ID = c("A001", "A002", "A003"),
valor = c(10, 20, 50))
t2 <- tibble(ID = c("A001", "A002", "A003"),
populacao = c(3000, 2500, 6000))
left_join(t1, t2, by = "ID")
flights %>%
filter(carrier == "UA", month == 9, day == 16, year == 2013) %>%
left_join(planes, by = "tailnum") %>%
group_by(model) %>%
tally() %>%
arrange(-n) %>% .[[1,1]]
## [1] "757-222"
flights %>%
left_join(planes, by = "tailnum") %>%
filter(origin == "JFK", dest == "ATL") %>%
group_by(month) %>%
summarise(total_assentos = sum(seats, na.rm = TRUE))
flights %>%
left_join(airlines, by = "carrier") %>%
group_by(carrier, name) %>%
tally() %>%
arrange(-n)
t1 <- tibble(
ID = c("A001", "A001", "A002", "A003", "A003"),
ano = c(2019, 2020, 2020, 2019, 2020),
valor = c(10, 12, 17, 50, 64)
)
t2 <- tibble(
ID = c("A001", "A002", "A002", "A003"),
ano = c(2019, 2019, 2020, 2019),
populacao = c(3000, 2500, 1900, 6000)
)
left_join(t1, t2, by = c("ano", "ID"))
Queremos dois banco de dados:
inner_join(t1, t2, by = c("ID", "ano"))
full_join(t1, t2, by = c("ID", "ano"))
flights %>% head()
weather %>% head()
flights %>%
left_join(weather, by = c("year", "month", "day", "hour", "origin" )) %>%
filter(origin == "LGA", month == 12) %>%
group_by(day) %>%
summarise(precip_media = round(mean(precip, na.rm = TRUE), 2))
flights %>% filter(origin == "LGA", month == 12) %>%
anti_join(weather, by = c("year", "month", "day", "hour", "origin" )) %>%
group_by(day) %>%
tally()
weather %>% filter(origin == "LGA", month == 12) %>%
anti_join(flights, by = c("year", "month", "day", "hour", "origin" )) %>%
group_by(day) %>%
tally()
A visibilidade afeta o número de partidas por hora?
flights %>% group_by(year, month, day, origin, hour) %>% tally() %>%
right_join(weather, by = c("year", "month", "day", "origin", "hour"))
flights %>% group_by(year, month, day, origin, hour) %>% tally() %>%
right_join(weather, by = c("year", "month", "day", "origin", "hour")) %>%
ungroup() %>% group_by(visib) %>%
summarise(media_n = mean(n, na.rm = TRUE)) %>%
ggplot() +
geom_point(aes(x = visib, y = media_n))
flights %>% group_by(origin, dest) %>% nest()
flights %>% group_by(origin, month, day, hour) %>% nest() %>%
left_join(weather, by = c("month", "day", "origin", "hour"))